{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "polish-inquiry", "metadata": {}, "outputs": [], "source": [ "from masskit.utils.tablemap import ArrowLibraryMap\n", "from masskit.data_specs.spectral_library import display_masskit_df\n", "from masskit.test_fixtures.demo_fixtures import cho_uniq_short_parquet\n", "from masskit.data_specs.file_schemas import display_drop_fields" ] }, { "attachments": {}, "cell_type": "markdown", "id": "dutch-vehicle", "metadata": {}, "source": [ "# Tables of spectra\n", "A spectral library is a table of spectrum objects and associated data (nce, precursor charge, etc.), arranged so that there is one spectrum per row and one column per piece of associated data. In Masskit, spectral libraries are kept in pandas dataframes and arrow tables stored in the parquet format. The arrow tables are wrapped in objects of class ArrowLibraryMap.\n" ] }, { "attachments": {}, "cell_type": "markdown", "id": "intelligent-commonwealth", "metadata": {}, "source": [ "## Pandas dataframes\n", "### Load a pandas dataframe from the parquet version of a spectral library" ] }, { "cell_type": "code", "execution_count": 2, "id": "economic-attitude", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "INFO:root:created chunk 1 with 100 records\n", "INFO:root:processing batch 0 with size 100\n", "INFO:root:created chunk 1 with 0 records\n" ] } ], "source": [ "table = ArrowLibraryMap.from_parquet(cho_uniq_short_parquet())\n", "df = table.table.to_pandas().drop(columns=display_drop_fields, errors='ignore').head(10)" ] }, { "attachments": {}, "cell_type": "markdown", "id": "exclusive-bosnia", "metadata": {}, "source": [ "### Take the top 10 lines of the dataframe and display in html" ] }, { "cell_type": "code", "execution_count": 3, "id": "intelligent-europe", "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
idinstrumentinstrument_typeinstrument_modelion_modeionizationnamecasnosynonymsscancollision_energyretention_timecollision_gasinsource_voltagesample_inletevncechargeprecursor_mzexact_massexact_mwsetcompositionpeptidepeptide_lenpeptide_typemod_namesmod_positionsprotein_idspectrum
0NoneNoneNoneNoneNoneAAAACALTPGPLADLAAR/2_1(4,C,CAM)NoneNoneNoneNaNNaNNoneNaNNone46.0035.002855.45NaNNaNNaNbestofAAAACALTPGPLADLAAR18tryptic[4][4]None\"spectrum
1NoneNoneNoneNoneNoneAAAACALTPGPLADLAAR/2_1(4,C,CAM)NoneNoneNoneNaNNaNNoneNaNNone53.0035.002855.45NaNNaNNaNbestofAAAACALTPGPLADLAAR18tryptic[4][4]None\"spectrum
2NoneNoneNoneNoneNoneAAAAGQTGTVPPGAPGALPLPGMAIVK/2NoneNoneNoneNaNNaNNoneNaNNone76.0035.0021207.17NaNNaNNaNbestofAAAAGQTGTVPPGAPGALPLPGMAIVK27semitryptic[][]None\"spectrum
3NoneNoneNoneNoneNoneAAAAGSTSVKPIFSR/2NoneNoneNoneNaNNaNNoneNaNNone44.0034.002731.90NaNNaNNaNbestofAAAAGSTSVKPIFSR15semitryptic[][]None\"spectrum
4NoneNoneNoneNoneNoneAAAAGSTSVKPIFSR/3NoneNoneNoneNaNNaNNoneNaNNone28.0034.003488.27NaNNaNNaNbestofAAAAGSTSVKPIFSR15semitryptic[][]None\"spectrum
5NoneNoneNoneNoneNoneAAAALGSHGSCSSEVEK/2_1(10,C,CAM)NoneNoneNoneNaNNaNNoneNaNNone50.0034.002830.88NaNNaNNaNbestofAAAALGSHGSCSSEVEK17tryptic[4][10]None\"spectrum
6NoneNoneNoneNoneNoneAAAALGSHGSCSSEVEK/2_1(10,C,CAM)NoneNoneNoneNaNNaNNoneNaNNone52.0035.002830.88NaNNaNNaNbestofAAAALGSHGSCSSEVEK17tryptic[4][10]None\"spectrum
7NoneNoneNoneNoneNoneAAAALGSHGSCSSEVEK/3_1(10,C,CAM)NoneNoneNoneNaNNaNNoneNaNNone32.0035.003554.26NaNNaNNaNbestofAAAALGSHGSCSSEVEK17tryptic[4][10]None\"spectrum
8NoneNoneNoneNoneNoneAAAALGSHGSCSSEVEKETQEK/3_1(10,C,CAM)NoneNoneNoneNaNNaNNoneNaNNone32.0035.003759.35NaNNaNNaNbestofAAAALGSHGSCSSEVEKETQEK22tryptic[4][10]None\"spectrum
9NoneNoneNoneNoneNoneAAAALGSHGSCSSEVEKETQEK/3_1(10,C,CAM)NoneNoneNoneNaNNaNNoneNaNNone43.0034.003759.35NaNNaNNaNbestofAAAALGSHGSCSSEVEKETQEK22tryptic[4][10]None\"spectrum
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "display_masskit_df(df)" ] }, { "attachments": {}, "cell_type": "markdown", "id": "brave-height", "metadata": {}, "source": [ "### List columns" ] }, { "cell_type": "code", "execution_count": 4, "id": "ranking-pickup", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Index(['id', 'instrument', 'instrument_type', 'instrument_model', 'ion_mode',\n", " 'ionization', 'name', 'casno', 'synonyms', 'scan', 'collision_energy',\n", " 'retention_time', 'collision_gas', 'insource_voltage', 'sample_inlet',\n", " 'ev', 'nce', 'charge', 'precursor_mz', 'exact_mass', 'exact_mw', 'set',\n", " 'composition', 'peptide', 'peptide_len', 'peptide_type', 'mod_names',\n", " 'mod_positions', 'protein_id', 'spectrum'],\n", " dtype='object')" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.columns" ] }, { "attachments": {}, "cell_type": "markdown", "id": "constant-texas", "metadata": {}, "source": [ "### Iterating through a dataframe" ] }, { "cell_type": "code", "execution_count": 5, "id": "guided-rings", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "AAAACALTPGPLADLAAR 35.0\n", "AAAACALTPGPLADLAAR 35.0\n", "AAAAGQTGTVPPGAPGALPLPGMAIVK 35.0\n", "AAAAGSTSVKPIFSR 34.0\n", "AAAAGSTSVKPIFSR 34.0\n", "AAAALGSHGSCSSEVEK 34.0\n", "AAAALGSHGSCSSEVEK 35.0\n", "AAAALGSHGSCSSEVEK 35.0\n", "AAAALGSHGSCSSEVEKETQEK 35.0\n", "AAAALGSHGSCSSEVEKETQEK 34.0\n" ] } ], "source": [ "for row in df.head(10).itertuples():\n", " print (row.peptide, row.nce)" ] }, { "attachments": {}, "cell_type": "markdown", "id": "legislative-tourism", "metadata": {}, "source": [ "### Query on the dataframe to return a dataframe containing rows that match the query, then display the new dataframe" ] }, { "cell_type": "code", "execution_count": 6, "id": "likely-circle", "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
idinstrumentinstrument_typeinstrument_modelion_modeionizationnamecasnosynonymsscancollision_energyretention_timecollision_gasinsource_voltagesample_inletevncechargeprecursor_mzexact_massexact_mwsetcompositionpeptidepeptide_lenpeptide_typemod_namesmod_positionsprotein_idspectrum
5NoneNoneNoneNoneNoneAAAALGSHGSCSSEVEK/2_1(10,C,CAM)NoneNoneNoneNaNNaNNoneNaNNone50.0034.002830.88NaNNaNNaNbestofAAAALGSHGSCSSEVEK17tryptic[4][10]None\"spectrum
6NoneNoneNoneNoneNoneAAAALGSHGSCSSEVEK/2_1(10,C,CAM)NoneNoneNoneNaNNaNNoneNaNNone52.0035.002830.88NaNNaNNaNbestofAAAALGSHGSCSSEVEK17tryptic[4][10]None\"spectrum
7NoneNoneNoneNoneNoneAAAALGSHGSCSSEVEK/3_1(10,C,CAM)NoneNoneNoneNaNNaNNoneNaNNone32.0035.003554.26NaNNaNNaNbestofAAAALGSHGSCSSEVEK17tryptic[4][10]None\"spectrum
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "df_single_peptide = df.query('peptide == \"AAAALGSHGSCSSEVEK\"')\n", "display_masskit_df(df_single_peptide)" ] }, { "attachments": {}, "cell_type": "markdown", "id": "assigned-welcome", "metadata": {}, "source": [ "## Select one spectrum from the new dataframe, filter out low m/z peaks, normalize the base intensity to 1.0" ] }, { "cell_type": "code", "execution_count": 7, "id": "b6baab7f", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "masskit.spectrum.spectrum.Spectrum" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "type(df_single_peptide.iloc[0]['spectrum'])" ] }, { "cell_type": "code", "execution_count": 8, "id": "disturbed-rwanda", "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZAAAADICAYAAADGFbfiAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjYuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8o6BhiAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAa70lEQVR4nO3dfVAU9/0H8PepB1iEQ0XgLvKkxsQKPkGqmKrEVAg+R2vVOKi12CHGJ9CaaMcaTRszjlEnVYNOfKI2o2NRo5Ua0KKSEYvyUBUfQkYCFO9EqTx4Kpzc9/cHP6+cHAjL3R4H79fMztx997u7n/1yw5vdW3YVQggBIiKiFupk7wKIiMgxMUCIiEgSBggREUnCACEiIkkYIEREJAkDhIiIJGGAEBGRJAwQIiKShAFCRESSMECIiEgSBggREUnCACEiIkkYIEREJEkXexfgKIxGI+7evQs3NzcoFAp7l0NE1GpCCFRVVUGj0aBTp5YfTzBAmunu3bvw9fW1dxlERFZXXFyM3r17t3g5Bkgzubm5AagbaHd3dztXQ0TUepWVlfD19TX9fmspBkgzPT9t5e7uzgAhonZF6ml5folORESSMECIiEgSBggREUnCACEiIkkcMkAuXLiASZMmQaPRQKFQ4Pjx4y9d5vz58wgJCYGLiwv69OmDhIQE2xdKRNSOOWSA6PV6DB48GNu3b29W/4KCAowfPx6jRo1CTk4O1qxZg6VLlyIpKcnGlRIRtV8OeRlvVFQUoqKimt0/ISEBfn5+2LZtGwBgwIABuHLlCjZv3ozp06fbqEoiovbNIY9AWiojIwMRERFmbZGRkbhy5QoMBoOdqiIicmwOeQTSUjqdDt7e3mZt3t7eePbsGR48eAC1Wt1gmerqalRXV5veV1ZW2rxOIiJH0iGOQICG/2kphLDY/tzGjRuhUqlME++DRe2NXg8oFHWTXm/vasgRdYgA8fHxgU6nM2srLS1Fly5d0LNnT4vLrF69GhUVFaapuLhYjlKJiBxGhziFFRYWhpMnT5q1paSkIDQ0FEql0uIyzs7OcHZ2lqM8IiKH5JBHII8ePUJubi5yc3MB1F2mm5ubi6KiIgB1Rw9z58419Y+NjUVhYSHi4+Nx8+ZN7N27F3v27MHKlSvtUT4RUbvgkEcgV65cwVtvvWV6Hx8fDwCYN28e9u/fD61WawoTAAgMDERycjLi4uKwY8cOaDQafPHFF7yEl4ioFRTi+bfJ1KTKykqoVCpUVFTwdu7ULuj1QLduda8fPQJcXe1bD8mvtb/XHPIUFhER2R8DhIiIJGGAEBGRJAwQIiKShAFCRESSMECIiEgSBggREUnCACEiIkkYIEREJAkDhIiIJGGAEBGRJAwQIiKShAFCRESSMECIiEgSBggREUnCACGidkuvBxSKukmvt3c17Q8DhIiIJGGAEFG7wSMOeckaIPPnz8eFCxfk3CQREdmIrAFSVVWFiIgIvPrqq/j0009RUlIi5+aJiMiKZA2QpKQklJSUYPHixThy5AgCAgIQFRWFv/3tbzAYDHKWQkRErST7dyA9e/bEsmXLkJOTg8zMTPTr1w/R0dHQaDSIi4tDfn6+3CUREZEEdvsSXavVIiUlBSkpKejcuTPGjx+PvLw8/PSnP8XWrVvtVRYRETWTrAFiMBiQlJSEiRMnwt/fH0eOHEFcXBy0Wi0OHDiAlJQU/OUvf8GGDRvkLIuIiCToIufG1Go1jEYjZs+ejczMTAwZMqRBn8jISHh4eMhZFhERSSBrgGzduhUzZsyAi4tLo326d++OgoICGasiIiIpZD2FlZaWZvFqK71ejwULFshZChERtZKsAXLgwAE8efKkQfuTJ0+QmJgoZylERNRKspzCqqyshBACQghUVVWZncKqra1FcnIyvLy85CiFiIisRJYA8fDwgEKhgEKhQP/+/RvMVygUWL9+vRylEBGRlcgSIGlpaRBCYOzYsUhKSkKPHj1M85ycnODv7w+NRiNHKUREZCWyBMiYMWMAAAUFBfDz84NCoZBjs0TUBuj1QLduda8fPQJcXe1bD1mPzQPk6tWrCAoKQqdOnVBRUYFr16412nfQoEG2LoeIiKzE5gEyZMgQ6HQ6eHl5YciQIVAoFBBCNOinUChQW1tr63KIiMhKbB4gBQUF6NWrl+k1ERG1DzYPEH9/f4uviYjIscn+j4SnTp0yvV+1ahU8PDwwcuRIFBYWylkKERG1kqwB8umnn6Jr164AgIyMDGzfvh2bNm2Cp6cn4uLi5CyFiIhaSdabKRYXF6Nfv34AgOPHj+OXv/wlfvvb3+LNN99EeHi4nKUQEVEryXoE0q1bN5SVlQEAUlJS8Itf/AIA4OLiYvEeWURE1HbJegQybtw4xMTEYOjQofj+++8xYcIEAEBeXh4CAgLkLIWIiFpJ1iOQHTt2ICwsDPfv30dSUhJ69uwJAMjKysLs2bPlLIWIiFpJISz9Vx81UFlZCZVKhYqKCri7u9u7HKJWk+sWI3LeyuTFbQG8jUpTWvt7TdZTWABQXl6OzMxMlJaWwmg0mtoVCgWio6PlLoeIiCSSNUBOnjyJOXPmQK/Xw83NzeymigwQIiLHIut3ICtWrMCCBQtQVVWF8vJyPHz40DT997//lbMUIiJqJVkDpKSkBEuXLsVPfvITOTdLREQ2IGuAREZG4sqVK1ZZ186dOxEYGAgXFxeEhIQgPT290b7nzp0zPRGx/nTr1i2r1EJE1BHJ+h3IhAkT8Lvf/Q43btxAcHAwlEql2fzJkyc3az2HDx/G8uXLsXPnTrz55pvYtWsXoqKicOPGDfj5+TW63O3bt82uNHh+l2Aiah/0evPXvOrKtmS9jLdTp8YPeFryPJDhw4dj2LBh+PLLL01tAwYMwNSpU7Fx48YG/c+dO4e33noLDx8+hIeHR4vrBngZL7U/7fEy3tJSwNu77vW9e3Xb4mW8jWvt7zVZT2EZjcZGp+aGR01NDbKyshAREWHWHhERgYsXLza57NChQ6FWq/H2228jLS2tyb7V1dWorKw0m4iI6H9kDZD6nj59Kmm5Bw8eoLa2Ft7P/8z4f97e3tDpdBaXUavV2L17N5KSknD06FG89tprePvtt3HhwoVGt7Nx40aoVCrT5OvrK6leIqL2StYAqa2txSeffIJXXnkF3bp1w507dwAAa9euxZ49e1q0rvr/QwIAQogGbc+99tprWLhwIYYNG4awsDDs3LkTEyZMwObNmxtd/+rVq1FRUWGaiouLW1QfERFQdwpPoaib6n9H0x7IGiB/+tOfsH//fmzatAlOTk6m9uDgYHz11VfNWoenpyc6d+7c4GijtLS0wVFJU0aMGIH8/PxG5zs7O8Pd3d1sIiJqL/R6QKVq3TpkDZDExETs3r0bc+bMQefOnU3tgwYNavYltU5OTggJCUFqaqpZe2pqKkaOHNnsWnJycqBWq5vdn4iIzMl6GW9JSYnpgVL1GY1GGAyGZq8nPj4e0dHRCA0NRVhYGHbv3o2ioiLExsYCqDv9VFJSgsTERADAtm3bEBAQgIEDB6KmpgYHDx5EUlISkpKSrLNjREQdkKwBMnDgQKSnp8Pf39+s/ciRIxg6dGiz1zNz5kyUlZVhw4YN0Gq1CAoKQnJysmm9Wq0WRUVFpv41NTVYuXIlSkpK0LVrVwwcOBCnTp3C+PHjrbNjREQdkKwBsm7dOkRHR6OkpARGoxFHjx7F7du3kZiYiL///e8tWteiRYuwaNEii/P2799v9n7VqlVYtWqV1LKJiMgCWb8DmTRpEg4fPozk5GQoFAr84Q9/wM2bN3Hy5EmMGzdOzlKIWqw9X01DbYOjfcZkfx5IZGQkIiMj5d4sERFZmaxHIH369EFZWVmD9vLycvTp00fOUoionm7dHOMvXntwtKMCOckaID/++KPFW5ZUV1ejpKREzlKIqAN48eaKHYkcwSfLKawTJ06YXn/77bdQ1fvvldraWpw9exYBAQFylEJERFYiS4BMnToVQN3tR+bNm2c2T6lUIiAgAJ9//rkcpRARkZXIEiBGoxEAEBgYiMuXL8PT01OOzRJZTf1bkhNRHVmvwiooKJBzc0REZEOyX8Z79uxZnD17FqWlpaYjk+f27t0rdzlEZGN8SmD7JWuArF+/Hhs2bEBoaCjUanWjt18nIqK2T9YASUhIwP79+xEdHS3nZomIOhS5vrOT9f9AampqWnTLdSIiartkDZCYmBh8/fXXcm6SiIhsRNZTWE+fPsXu3btx5swZDBo0CEql0mz+li1b5CyHiEhW9U8tPXrk+BcUyBogV69exZAhQwAA169fl3PTRERkZbIGSFpampybIyIiG5IlQKZNm/bSPgqFgo+YJSJyILIESP2bJxJJZa/zx7a+JLK9nRd/mY62v+2ZLAGyb98+OTZDJDv+MrQ9jnHbHQNZL+MlIqL2gwFCRO2St3fHe4iU3BggREQkCQOEiIgkYYAQUZOkPFtbjudxk/0xQIiImvDi80yau4xcAWrPsGaAENmZlF9QLy7Pv/bJHhgg1K7xlyt1JN26yftZZ4AQEZEkDBAiIpKEAUJERJIwQIiISBIGCBERScIAISIiSRggREQkCQOEiIgkYYAQEZEkDBAiIpKEAUJERJIwQIiISBIGCBERScIAISIiSRggREQkCQOEiIgkYYAQEZEkDBAiIpKEAUJERJI4bIDs3LkTgYGBcHFxQUhICNLT05vsf/78eYSEhMDFxQV9+vRBQkKCTJUSEbVPDhkghw8fxvLly/H73/8eOTk5GDVqFKKiolBUVGSxf0FBAcaPH49Ro0YhJycHa9aswdKlS5GUlCRz5URE7YdDBsiWLVvwm9/8BjExMRgwYAC2bdsGX19ffPnllxb7JyQkwM/PD9u2bcOAAQMQExODBQsWYPPmzTJXTkTUfjhcgNTU1CArKwsRERFm7REREbh48aLFZTIyMhr0j4yMxJUrV2AwGGxWKxFRe9bF3gW01IMHD1BbWwtvb2+zdm9vb+h0OovL6HQ6i/2fPXuGBw8eQK1WN1imuroa1dXVpvcVFRUAgMrKytbuAkmk1//vdWUlUFtrm2WaWseL67LG+quqzF+7uEivryU1vLhfjS3b2nF/cf/qL9+c9bVk+/W3ZWnbLR1bqetoav+fs7Qvje1rU2Ng6fP54jg0vq2632dCCIv78TIOFyDPKRQKs/dCiAZtL+tvqf25jRs3Yv369Q3afX19W1oq2YBGI88yLVmXNdbfr1/rlm9NDc1ZVsr6Bw/+3+sX96+l62tp/6a2LYWUddRfpv7rl+1LY/ObMwYtHeeqqiqoVKqXr/gFDhcgnp6e6Ny5c4OjjdLS0gZHGc/5+PhY7N+lSxf07NnT4jKrV69GfHy86X15eTn8/f1RVFQkaaDtqbKyEr6+viguLoa7u7u9y2k21i0/R62ddUsjhEBVVRU0Ev/ycLgAcXJyQkhICFJTU/Huu++a2lNTUzFlyhSLy4SFheHkyZNmbSkpKQgNDYVSqbS4jLOzM5ydnRu0q1Qqh/qA1ufu7u6QtbNu+Tlq7ay75VrzB7HDfYkOAPHx8fjqq6+wd+9e3Lx5E3FxcSgqKkJsbCyAuqOHuXPnmvrHxsaisLAQ8fHxuHnzJvbu3Ys9e/Zg5cqV9toFIiKH53BHIAAwc+ZMlJWVYcOGDdBqtQgKCkJycjL8/f0BAFqt1ux/QgIDA5GcnIy4uDjs2LEDGo0GX3zxBaZPn26vXSAicngOGSAAsGjRIixatMjivP379zdoGzNmDLKzsyVvz9nZGevWrbN4Wqutc9TaWbf8HLV21m0fCiH1+i0iIurQHPI7ECIisj8GCBERScIAISIiSRggzdTS28fb0saNG/HGG2/Azc0NXl5emDp1Km7fvm3WZ/78+VAoFGbTiBEjzPpUV1djyZIl8PT0hKurKyZPnoz//Oc/Nq39448/blCXj4+Pab4QAh9//DE0Gg26du2K8PBw5OXl2b3ugICABnUrFAp88MEHANrWeF+4cAGTJk2CRqOBQqHA8ePHzeZba4wfPnyI6OhoqFQqqFQqREdHo7y83CZ1GwwGfPjhhwgODoarqys0Gg3mzp2Lu3fvmq0jPDy8wc9h1qxZdqsbsN5nw9p1WwMDpBlaevt4Wzt//jw++OADXLp0CampqXj27BkiIiKgf+GmOO+88w60Wq1pSk5ONpu/fPlyHDt2DIcOHcJ3332HR48eYeLEiaiVckOnFhg4cKBZXdeuXTPN27RpE7Zs2YLt27fj8uXL8PHxwbhx41BV7+Y+9qj78uXLZjWnpqYCAGbMmGHq01bGW6/XY/Dgwdi+fbvF+dYa4/feew+5ubk4ffo0Tp8+jdzcXERHR9uk7sePHyM7Oxtr165FdnY2jh49iu+//x6TJ09u0HfhwoVmP4ddu3aZzZez7ues8dmwdt1WIeilfvazn4nY2Fizttdff1189NFHdqrIXGlpqQAgzp8/b2qbN2+emDJlSqPLlJeXC6VSKQ4dOmRqKykpEZ06dRKnT5+2Wa3r1q0TgwcPtjjPaDQKHx8f8dlnn5nanj59KlQqlUhISLBr3S9atmyZ6Nu3rzAajUKItjveAMSxY8dM7601xjdu3BAAxKVLl0x9MjIyBABx69Ytq9dtSWZmpgAgCgsLTW1jxowRy5Yta3QZe9Rtjc+GreuWikcgLyHl9vFye36n4B49epi1nzt3Dl5eXujfvz8WLlyI0tJS07ysrCwYDAaz/dJoNAgKCrL5fuXn50Oj0SAwMBCzZs3CnTt3ANQ9+Eun05nV5OzsjDFjxphqsmfdz9XU1ODgwYNYsGCB2c042+p412etMc7IyIBKpcLw4cNNfUaMGAGVSiXb/lRUVEChUMDDw8Os/a9//Ss8PT0xcOBArFy50uzIyl51t/az0RbG2xKH/UdCuUi5fbychBCIj4/Hz3/+cwQFBZnao6KiMGPGDPj7+6OgoABr167F2LFjkZWVBWdnZ+h0Ojg5OaF79+5m67P1fg0fPhyJiYno378/7t27hz/+8Y8YOXIk8vLyTNu1NNaFhYUAYLe66zt+/DjKy8sxf/58U1tbHe8XWWuMdTodvLy8Gqzfy8tLlv15+vQpPvroI7z33ntm95CaM2cOAgMD4ePjg+vXr2P16tX497//bTrlaI+6rfHZsPd4N4YB0kwtvX28XBYvXoyrV6/iu+++M2ufOXOm6XVQUBBCQ0Ph7++PU6dOYdq0aY2uz9b7FRUVZXodHByMsLAw9O3bFwcOHDB9sShlrOX8eezZswdRUVFmdzBtq+PdGGuMsaX+cuyPwWDArFmzYDQasXPnTrN5CxcuNL0OCgrCq6++itDQUGRnZ2PYsGF2qdtanw17jXdTeArrJaTcPl4uS5YswYkTJ5CWlobevXs32VetVsPf3x/5+fkA6m5xX1NTg4cPH5r1k3u/XF1dERwcjPz8fNPVWE2Ntb3rLiwsxJkzZxATE9Nkv7Y63tYaYx8fH9y7d6/B+u/fv2/T/TEYDPjVr36FgoICpKamvvQOtsOGDYNSqTT7Odij7vqkfDbaQt2WMEBeov7t4+tLTU3FyJEj7VKTEAKLFy/G0aNH8c9//hOBgYEvXaasrAzFxcWmpy+GhIRAqVSa7ZdWq8X169dl3a/q6mrcvHkTarXadOqhfk01NTU4f/68qSZ7171v3z54eXlhwoQJTfZrq+NtrTEOCwtDRUUFMjMzTX3+9a9/oaKiwmb78zw88vPzcebMmUaf5VNfXl4eDAaD6edgj7pfJOWz0RbqtsguX907mEOHDgmlUin27Nkjbty4IZYvXy5cXV3Fjz/+aJd63n//faFSqcS5c+eEVqs1TY8fPxZCCFFVVSVWrFghLl68KAoKCkRaWpoICwsTr7zyiqisrDStJzY2VvTu3VucOXNGZGdni7Fjx4rBgweLZ8+e2az2FStWiHPnzok7d+6IS5cuiYkTJwo3NzfTWH722WdCpVKJo0ePimvXronZs2cLtVpt97qFEKK2tlb4+fmJDz/80Ky9rY13VVWVyMnJETk5OQKA2LJli8jJyTFdrWStMX7nnXfEoEGDREZGhsjIyBDBwcFi4sSJNqnbYDCIyZMni969e4vc3Fyzz311dbUQQogffvhBrF+/Xly+fFkUFBSIU6dOiddff10MHTrUbnVb87Nh7bqtgQHSTDt27BD+/v7CyclJDBs2zOySWbkBsDjt27dPCCHE48ePRUREhOjVq5dQKpXCz89PzJs3TxQVFZmt58mTJ2Lx4sWiR48eomvXrmLixIkN+ljbzJkzhVqtFkqlUmg0GjFt2jSRl5dnmm80GsW6deuEj4+PcHZ2FqNHjxbXrl2ze91CCPHtt98KAOL27dtm7W1tvNPS0ix+PubNmyeEsN4Yl5WViTlz5gg3Nzfh5uYm5syZIx4+fGiTugsKChr93KelpQkhhCgqKhKjR48WPXr0EE5OTqJv375i6dKloqyszG51W/OzYe26rYF34yUiIkn4HQgREUnCACEiIkkYIEREJAkDhIiIJGGAEBGRJAwQIiKShAFCRESSMECIiEgSBggREUnCACEiIkkYIERtXGFhIZydnVFZWWnvUojMMECI2rhvvvkG4eHhL332BZHcGCBEMgkPD8eSJUuwfPlydO/eHd7e3ti9ezf0ej1+/etfw83NDX379sU//vEPs+W++eYbTJ48GUDdU+lenAICAuywN0QMECJZHThwAJ6ensjMzMSSJUvw/vvvY8aMGRg5ciSys7MRGRmJ6OhoPH78GABQXl6O9PR0U4BotVrT9MMPP6Bfv34YPXq0PXeJOjDezp1IJuHh4aitrUV6ejoAoLa2FiqVCtOmTUNiYiKAukfNqtVqZGRkYMSIEfj666/x+eefIysry2xdQghMnz4dRUVFSE9PR9euXWXfH6Iu9i6AqCMZNGiQ6XXnzp3Rs2dPBAcHm9qeP9+6tLQUgPnpq/rWrFmDjIwMXL58meFBdsNTWEQyUiqVZu8VCoVZm0KhAAAYjUYYDAacPn0aU6ZMMVvm4MGD2Lp1K44dO4bevXvbvmiiRjBAiNqotLQ0eHh4YMiQIaa2jIwMxMTEYNeuXRgxYoT9iiMCT2ERtVknTpwwO32l0+nw7rvvYtasWYiMjIROpwNQdyqsV69e9iqTOjAegRC1USdOnDA7fXXr1i3cu3cPBw4cgFqtNk1vvPGGHaukjoxXYRG1QdnZ2Rg7dizu37/f4HsToraCRyBEbdCzZ8/w5z//meFBbRqPQIiISBIegRARkSQMECIikoQBQkREkjBAiIhIEgYIERFJwgAhIiJJGCBERCQJA4SIiCRhgBARkSQMECIikoQBQkREkjBAiIhIEgYIERFJwgAhIiJJ/g+MbG/543eNDQAAAABJRU5ErkJggg==", "image/svg+xml": [ "\n", "\n", "\n", " \n", " \n", " \n", " \n", " 2023-07-12T14:16:43.126665\n", " image/svg+xml\n", " \n", " \n", " Matplotlib v3.6.2, https://matplotlib.org/\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "\n" ], "text/plain": [ "" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "new_spectrum = df_single_peptide.iloc[0]['spectrum']\n", "new_spectrum = new_spectrum.filter(min_mz=500).norm(1.0)\n", "new_spectrum" ] }, { "attachments": {}, "cell_type": "markdown", "id": "obvious-welsh", "metadata": {}, "source": [ "### Input/output\n", "The spectral library in the form of a dataframe has the ability to read and write a variety of formats." ] }, { "attachments": {}, "cell_type": "markdown", "id": "decimal-regular", "metadata": {}, "source": [ "#### MSP" ] }, { "cell_type": "code", "execution_count": 9, "id": "cathedral-candidate", "metadata": {}, "outputs": [], "source": [ "# write out msp file with peptide annotations\n", "df_single_peptide['spectrum'].array.to_msp(\"peptides.msp\", annotate_peptide=True)" ] }, { "attachments": {}, "cell_type": "markdown", "id": "prescription-baghdad", "metadata": {}, "source": [ "##### save a single spectrum at row 0 as an msp file" ] }, { "cell_type": "code", "execution_count": 10, "id": "worthy-rochester", "metadata": {}, "outputs": [], "source": [ "df_single_peptide.iloc[[0]]['spectrum'].array.to_msp(\"single_spectrum.msp\")" ] }, { "attachments": {}, "cell_type": "markdown", "id": "scientific-today", "metadata": {}, "source": [ "#### MGF" ] }, { "cell_type": "code", "execution_count": 11, "id": "vocal-failing", "metadata": {}, "outputs": [], "source": [ "df_single_peptide['spectrum'].array.to_mgf(\"peptides.msp\")" ] }, { "attachments": {}, "cell_type": "markdown", "id": "associate-warren", "metadata": {}, "source": [ "#### PKL (python pickle)" ] }, { "cell_type": "code", "execution_count": 12, "id": "lucky-recovery", "metadata": {}, "outputs": [], "source": [ "df_single_peptide.to_pickle('single_peptide.pkl')" ] }, { "attachments": {}, "cell_type": "markdown", "id": "smoking-importance", "metadata": {}, "source": [ "#### CSV" ] }, { "cell_type": "code", "execution_count": 13, "id": "acceptable-insertion", "metadata": {}, "outputs": [], "source": [ "df_single_peptide.to_csv('single_peptide.csv')" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3.9.10 ('base')", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.15" }, "vscode": { "interpreter": { "hash": "11d150ef1a59d6ee6bd3538ad9ed751649d8a614c736b8deec7e36a34a38bbb5" } } }, "nbformat": 4, "nbformat_minor": 5 }