Skip to article frontmatterSkip to article content
Site not loading correctly?

This may be due to an incorrect BASE_URL configuration. See the MyST Documentation for reference.

Things on this page are fragmentary and immature notes/thoughts of the author. Please read with your own judgement!

  1. PyYAML (YAML 1.1 currently) and ruamel.yaml (YAML 1.2) are two Python libraries for parsing YAML. PyYAML is more widely used.

  2. PyYAML is preferred over json for serialization and deserialization for multiple reasons.

    • PyYAML is a superset of json.

    • PyYAML supports serializing and deserializing set while json does not.

    • YAML is more readable than JSON.

!pip3 install pyyaml
Requirement already satisfied: pyyaml in /usr/local/lib/python3.8/site-packages (5.3.1)
import datetime
import yaml
doc = """
  a: 1
  b:
    c: 3
    d: "山不在高"
"""
dic = yaml.load(doc, Loader=yaml.FullLoader)
dic
{'a': 1, 'b': {'c': 3, 'd': '山不在高'}}
print(yaml.dump(dic, encoding="utf-8").decode())
a: 1
b:
  c: 3
  d: "\u5C71\u4E0D\u5728\u9AD8"

print(yaml.dump(dic, allow_unicode=True))
a: 1
b:
  c: 3
  d: 山不在高

print(yaml.dump(dic, allow_unicode=True, encoding="").decode())
a: 1
b:
  c: 3
  d: 山不在高

yaml.dump(dic, open("test.yml", "w"))
yaml.load(open("test.yml"), Loader=yaml.FullLoader)
{'a': 1, 'b': {'c': 3, 'd': 4}}

Read YAML from a String

doc = """
- 
    cal_dt: 2019-01-01
- 
    cal_dt: 2019-01-02
    
    
"""
yaml.load(doc, Loader=yaml.FullLoader)
[{'cal_dt': datetime.date(2019, 1, 1)}, {'cal_dt': datetime.date(2019, 1, 2)}]

Read YAML Form File (Single Doc)

with open("items.yaml") as fin:
    data = yaml.load(fin, Loader=yaml.FullLoader)
print(data)
{'raincoat': 1, 'coins': 5, 'books': 23, 'spectacles': 2, 'chairs': 12, 'pens': 6}
!cat set.yaml
!!set
1: null
2: null
3: null
with open("set.yaml", "r") as fin:
    data = yaml.load(fin, Loader=yaml.FullLoader)
data
{1, 2, 3}
type(data)
set

Read YAML (Multiple Docs)

Notice that the method yaml.load_all returns a generator!

with open("data.yaml") as f:
    docs = yaml.load_all(f, Loader=yaml.FullLoader)
    for doc in docs:
        for k, v in doc.items():
            print(k, "->", v)
cities -> ['Bratislava', 'Kosice', 'Trnava', 'Moldava', 'Trencin']
companies -> ['Eset', 'Slovnaft', 'Duslo Sala', 'Matador Puchov']

Convert generator to a list so that you use it out of the with block.

with open("data.yaml") as f:
    docs = list(yaml.load_all(f, Loader=yaml.FullLoader))
docs
[{'cities': ['Bratislava', 'Kosice', 'Trnava', 'Moldava', 'Trencin']}, {'companies': ['Eset', 'Slovnaft', 'Duslo Sala', 'Matador Puchov']}]
for doc in docs:
    for k, v in doc.items():
        print(k, "->", v)
cities -> ['Bratislava', 'Kosice', 'Trnava', 'Moldava', 'Trencin']
companies -> ['Eset', 'Slovnaft', 'Duslo Sala', 'Matador Puchov']

YAML Dump to String

users = [
    {
        "name": "John Doe", 
        "occupation": "gardener",
        "dob": datetime.datetime.now(),
    },
    {"name": "Lucy Black", "occupation": "teacher"},
]

print(yaml.dump(users, sort_keys=False))
- name: John Doe
  occupation: gardener
  dob: 2026-04-03 12:06:44.864403
- name: Lucy Black
  occupation: teacher

print(yaml.dump(set([1, 2, 3]), sort_keys=False))
!!set
1: null
2: null
3: null

YAML Dump to File

with open("users.yaml", "w") as fout:
    yaml.dump(users, fout, sort_keys=False)
with open("set.yaml", "w") as fout:
    yaml.dump(set([1, 2, 3]), fout, sort_keys=False)
!cat set.yaml
!!set
1: null
2: null
3: null

Tokens

PyYAML can work with a lower-level API when parsing YAML files. The mehtod scan scans a YAML stream and produces scanning tokens.

The following example scans and prints tokens.

with open("items.yaml") as f:
    data = yaml.scan(f, Loader=yaml.FullLoader)
    for token in data:
        print(token)
StreamStartToken(encoding=None)
BlockMappingStartToken()
KeyToken()
ScalarToken(plain=True, style=None, value='raincoat')
ValueToken()
ScalarToken(plain=True, style=None, value='1')
KeyToken()
ScalarToken(plain=True, style=None, value='coins')
ValueToken()
ScalarToken(plain=True, style=None, value='5')
KeyToken()
ScalarToken(plain=True, style=None, value='books')
ValueToken()
ScalarToken(plain=True, style=None, value='23')
KeyToken()
ScalarToken(plain=True, style=None, value='spectacles')
ValueToken()
ScalarToken(plain=True, style=None, value='2')
KeyToken()
ScalarToken(plain=True, style=None, value='chairs')
ValueToken()
ScalarToken(plain=True, style=None, value='12')
KeyToken()
ScalarToken(plain=True, style=None, value='pens')
ValueToken()
ScalarToken(plain=True, style=None, value='6')
BlockEndToken()
StreamEndToken()

Fix Indention Issue

PyYAML has an issue of indention currently. For details, please refer to Incorrect indentation with lists #234 .

class Dumper(yaml.Dumper):
    def increase_indent(self, flow=False, *args, **kwargs):
        return super().increase_indent(flow=flow, indentless=False)


yaml.dump(data, Dumper=Dumper)

Examples

with open("ex1.yaml", "r") as fin:
    data = yaml.load(fin, Loader=yaml.FullLoader)
print(data)
{'args': [{'cal_dt': '2019-01-01', 'path': '/path/1'}, {'cal_dt': '2019-01-02', 'path': '/path/2'}]}
with open("ex2.yaml", "r") as fin:
    data = yaml.load(fin, Loader=yaml.FullLoader)
print(data)
[{'cal_dt': datetime.date(2019, 1, 1), 'path': '/path/1'}, {'cal_dt': datetime.date(2019, 1, 2), 'path': '/path/2'}]
type(data[0]["cal_dt"])
datetime.date
with open("ex3.yaml", "r") as fin:
    data = yaml.load(fin, Loader=yaml.FullLoader)
print(data)
{'args': {'x': [1, 2, 3], 'y': ['a', 'b', 'c']}}
with open("ex4.yaml", "r") as fin:
    data = yaml.load(fin, Loader=yaml.FullLoader)
print(data)
{'x': [1, 2, 3], 'y': ['a', 'b', 'c']}
with open("ex5.yaml", "r") as fin:
    data = yaml.load(fin, Loader=yaml.FullLoader)
data
{'x': [1, 2, 3], 'y': "import dsutil\ndsutil.datetime.range('2019-01-01', '2019-01-05')"}
data["y"]
"import dsutil\ndsutil.datetime.range('2019-01-01', '2019-01-05')"
eval(compile(data["y"], "some_file", "exec"))
x = eval("range(8)")
x
range(0, 8)
import json

json.dumps(list(x))
'[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]'
list(exec(data["y"]))
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-34-42675847b9bb> in <module>
----> 1 list(exec(data['y']))

TypeError: 'NoneType' object is not iterable

eval, exec, single, compile

  1. simple 1 line python code which requires you to have every library ready ...

  2. multiple: need a way to reliably run the code and return the result ...

yaml.load("""!!python/list(range(10))""", Loader=yaml.FullLoader)